****************************************************************************
**		Authors: 	Or Tuttnauer & Liran Harsgor
**		Purpose: 	Produce Figure 3 in Kedar, Harsgor & Tuttnauer (JOP)  
**		input:		KHT_countrylevel.dta
*****************************************************************************

log using "figure 3.log", replace

set more off

use "KHT_countrylevel.dta", clear

generate MVZ= ((_n-1)/14)

*** 10th and 90th percentiles for center and dispersion variables
sum sr , detail
scalar sr10=r(p10)
scalar sr90=r(p90)

** Manually add jitter for scatter (rug)
	gen disp=0
	gen dispmeddm=lnmeddm+(runiform()-.5)/10
	gen pipe="|"
	
** Generate placeholedrs for points a, a', b', c' 
gen graph_x=.
gen graph_y=.
gen str2 graph_label=""

*** BASELINE MODEL
** 1. Run the Regression **
regress enps enpv lnmeddm vlmeddm smd mmm fused upper, robust
matrix coeffs_1=e(b)

** 2. Note points on prediction line **
** Calculate lnmeddm for effect_a=0.75 (a is a point on the baseline prediction line)
scalar effect_a=0.75

/*
calculate horizonal coordinate:
effect = b1+b3*lnmeddm, effect-b1 = b3*lnmeddm, lnmeddm = (effect-b1)/b3 
*/
scalar lnmeddm_a=(effect_a-coeffs_1[1,1])/coeffs_1[1,3]
scalar meddm_a=exp(lnmeddm_a)
display "point a is at effect " effect_a " and lnmeddm " lnmeddm_a " (meddm " meddm_a ")"
replace graph_x=lnmeddm_a in 1
replace graph_y=effect_a in 1
replace graph_label="A" in 1

** 3. Interaction Analysis **
* Save coefficients and V-CV matrix *	
	matrix b=e(b)
	matrix V=e(V)
	scalar b1=b[1,1]  		/* The first coefficient */
	scalar b3=b[1,3]		/* The interaction's coefficient */
	scalar varb1=V[1,1]		/* The variance of b1 */
	scalar varb3=V[3,3]		/* The variance of b3 */
	scalar covb1b3=V[1,3]	/* The covariance of b1 and b3 */

* Marginal effect of enpv on enps, and SE of the effect
	gen pred_meddm1=b1+b3*lnmeddm
	gen conbx= b1+b3*MVZ
	gen consx= sqrt(varb1+varb3*(MVZ^2)+2*covb1b3*MVZ)
* Confidence intervals
	gen a= 1.96*consx
	gen upperx= conbx+a
	gen lowerx= conbx-a

graph twoway 	 scatter disp dispmeddm if lnmeddm<3.1 & smd==0, ms(none) mlabel(pipe) mlabposition(0) mcolor(black) ///
			||	 scatter graph_y graph_x, mlabel(graph_label) mcolor(black) mlabpos(12) ///
			||	 line conbx  MVZ if MVZ<3.7, clwidth(medium) clpattern(solid) clcolor(gs10)  ///
			||   line upperx MV if MVZ<3.7, clpattern(solid) clwidth(thin) clcolor(gs10)  ///
			||   line lowerx MV if MVZ<3.7, clpattern(solid) clwidth(thin) clcolor(gs10)  /// 
            , legend(off) graphregion(fcolor(white) lcolor(white)) ///
            xtitle("Median district magnitude (logged)", size(3)) ///
            ytitle("Conversion of ENPV to ENPS", size(3))  ///
            scheme(s2mono) yscale(range(0.2 1.1)) xscale(range(0 3.7)) ylabel(0(.2)1) ///
			xlabel(0(1)3)

graph save int_baseline_sr.gph, replace
drop conbx consx a upperx lowerx MVZ


*** MODEL WITH SR
	set obs 2035000
	generate sr_f = (int((_n-1)/3700))/450
	gen MVZ = (mod(_n-1,3700))/1000
	
** 1. Run the regression **	
reg enps enpv lnmeddm sr vlmeddm vsr smd mmm fused upper, robust
matrix coeffs_2=e(b)

** 2. Note points on prediction line **
scalar lnmeddm_atag = (effect_a-coeffs_2[1,1]-coeffs_2[1,5]*sr90)/coeffs_2[1,4]
scalar lnmeddm_btag = (effect_a-coeffs_2[1,1]-coeffs_2[1,5]*sr10)/coeffs_2[1,4]
scalar effect_ctag = coeffs_2[1,1]+coeffs_2[1,4]*lnmeddm_atag+coeffs_2[1,5]*sr10
scalar meddm_atag = exp(lnmeddm_atag)
scalar meddm_btag = exp(lnmeddm_btag)

display "point a' is at effect " effect_a " and lnmeddm " lnmeddm_atag " (meddm " meddm_atag ")"
display "point c' is at effect " effect_a " and lnmeddm " lnmeddm_btag " (meddm " meddm_btag ")"
display "point b' is at effect " effect_ctag " and lnmeddm " lnmeddm_atag " (meddm " meddm_atag ")"
replace graph_x=lnmeddm_atag in 2
replace graph_x=lnmeddm_btag in 3
replace graph_x=lnmeddm_atag in 4
replace graph_y=effect_a in 2
replace graph_y=effect_a in 3
replace graph_y=effect_ctag in 4
replace graph_label="A'" in 2
replace graph_label="C'" in 3
replace graph_label="B'" in 4

** 3. Interaction Analysis **

* Save coefficients and V-CV matrix *	
	matrix b=e(b)
	matrix V=e(V)
	scalar b1=b[1,1]  		/* The first coefficient (enpv) */
	scalar b4=b[1,4]		/* The first interaction's coefficient */
	scalar b5=b[1,5]		/* The second interaction's coefficient */
	scalar varb1=V[1,1]		/* The variance of b1 */
	scalar varb4=V[4,4]		/* The variance of b4 */
	scalar varb5=V[5,5]		/* The variance of b5 */
	scalar covb1b4=V[1,4]	/* The covariance of b1 and b4 */
	scalar covb1b5=V[1,5]	/* The covariance of b1 and b5 */
	scalar covb4b5=V[4,5]	/* The covariance of b4 and b5 */
	
* Generate a figure with lnmeddm on X-axis, holding SR at 10th or 90th percentiles
	gen conbx10= b1+b4*MVZ+b5*sr10
	gen consx10= sqrt(varb1+varb4*(MVZ^2)+2*covb1b4*MVZ+(sr10^2)*varb5+ ///
	2*sr10*covb1b5+2*MVZ*sr10*covb4b5)
	gen conbx90= b1+b4*MVZ+b5*sr90
	gen consx90= sqrt(varb1+varb4*(MVZ^2)+2*covb1b4*MVZ+(sr90^2)*varb5+ ///
	2*sr90*covb1b5+2*MVZ*sr90*covb4b5)

	* Confidence intervals
	gen a10= 1.96*consx10
	gen upperx10= conbx10+a10
	gen lowerx10= conbx10-a10
	
	gen a90= 1.96*consx90
	gen upperx90= conbx90+a90
	gen lowerx90= conbx90-a90
	
* Generate a line at 0	
	gen yline=0
	gen xline=effect_a
	
* Draw the figure
	graph twoway scatter disp dispmeddm if lnmeddm<3.7 & smd==0 in f/3700, ms(none) mlabel(pipe) mlabposition(0) mcolor(black) ///
		||	 scatter sr_f MVZ if sr_f>lowerx10 & sr_f<upperx10, mcolor(gs14) msize(vsmall) msymbol(o) ///
		||	 scatter sr_f MVZ if sr_f<upperx90 & sr_f>lowerx90, mcolor(gs11) msize(vsmall) msymbol(o) ///
		||	 scatter sr_f MVZ if sr_f<upperx10 & sr_f>lowerx90, mcolor(gs9) msize(vsmall) msymbol(o) ///
		||	 scatter graph_y graph_x in 2, mlabel(graph_label) mcolor(black) mlabpos(12) msymbol(D) ///
		||	 scatter graph_y graph_x in 4, mlabel(graph_label) mcolor(black) mlabpos(6) msymbol(D) ///
		||	 scatter graph_y graph_x in 3, mlabel(graph_label) mcolor(black) mlabpos(12) msymbol(D) ///
		||	 line conbx10 MVZ if MVZ<3.7 in f/3700, clpattern(longdash) clwidth(medium) clcolor(black) ///
        ||   line conbx90 MVZ if MVZ<3.7 in f/3700, clpattern(shortdash) clwidth(medium) clcolor(black) ///
        ||   line upperx10 MVZ if MVZ<3.7 in f/3700, clpattern(solid) clwidth(thin) clcolor(gray) ///
        ||   line lowerx10 MVZ if MVZ<3.7 in f/3700, clpattern(solid) clwidth(thin) clcolor(gray)  ///
		||   line upperx90 MVZ if MVZ<3.7 in f/3700, clpattern(solid) clwidth(thin) clcolor(gray)  ///
        ||   line lowerx90 MVZ if MVZ<3.7 in f/3700, clpattern(solid) clwidth(thin) clcolor(gray) ///
			, legend(off) graphregion(fcolor(white) lcolor(white))  ///
           xtitle("Median district magnitude (logged)", size(3)) ///
            scheme(s2mono) yscale(range(0.2 1.1)) xscale(range(0 3.7)) ylabel(0(.2)1) ///
			xlabel(0(1)3)

graph save int_sr.gph, replace
drop conbx10 consx10 a10 upperx10 lowerx10 conbx90 consx90 a90 upperx90 ///
lowerx90 yline xline sr_f MVZ

graph combine int_baseline_sr.gph int_sr.gph, imargin(r=5 l=0) scheme(s1mono) ///
xcommon
graph save "figure 3.gph", replace

log close

